# prevent library(sqldf) from triggering a tcl/tk dependency which causes R to exit on OS X if X11 isn’t installed. See https://code.google.com/p/sqldf/ for troubleshooting details.
options(gsubfn.engine = "R")
library('sqldf')
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library('ggplot2')
library('zoo')
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library('reshape2')


# days : increasing sequence of integers
# k : positive number 
# return data frame days,prev where prev=max day<=prev-k
lagDays <- function(days,k) {
  prevI <- 1
  dayVec <- numeric(length(days))
  prevVec <- numeric(length(days))
  i <- 0
  for(curI in seq(2,length(days))) {
    while((prevI<curI-1)&&(days[prevI+1]<=days[curI]-k)) {
      prevI <- prevI + 1
    }
    if(days[prevI]<=days[curI]-k) {
      i <- i+1
      dayVec[[i]] <- days[curI]
      prevVec[[i]] <- days[prevI]
    }
  }
  data.frame(days=dayVec[seq_len(i)],prev=prevVec[seq_len(i)],
             stringsAsFactors = FALSE)
}

compReturn <- function(Investment,k,colName) {
  lags <- lagDays(Investment$dayCount,k)
  rets <- sqldf('
    SELECT
      sp1.dayCount dayCount,
      sp1.dayCount-sp2.dayCount width,
      (sp1.Close - sp2.Close)/sp2.Close Return
    FROM
      lags
    INNER JOIN
      Investment sp1
    ON
      sp1.dayCount=lags.days
    INNER JOIN
      Investment sp2
    ON
      sp2.dayCount=lags.prev
  ')
  rets[[colName]] <- ((1+rets$Return)^(k/rets$width)-1)
  rets <- rets[,c('dayCount',colName),drop=FALSE]
  r <- merge(Investment,rets,by='dayCount')
  r
}


SharpeRatio <- function(Investment,column,window) {
  means <- rollapply(Investment[[column]],width=window,FUN=mean)
  means <- c(rep(NA,nrow(Investment)-length(means)),means)
  sds <- rollapply(Investment[[column]],width=window,FUN=sd)
  sds <- c(rep(NA,nrow(Investment)-length(sds)),sds)
  sr <- means/sds
  Investment[[paste('Mean',column,sep='_')]] <- means
  Investment[[paste('StdDev',column,sep='_')]] <- sds
  Investment[[paste('SharpeRatio',column,sep='_')]] <- sr
  Investment
}



daysInYear <- 365.242
daysInMonth <- daysInYear/12
dayMap <- list('DailyReturn'=1,
              'TenDayReturn'=10,
              'MonthlyReturn'=daysInMonth,
              'AnnualReturn'=daysInYear)


refName='TNX.csv'

for(instName in c('SP.csv','TENZ.csv')) {
  print('********************************************************')
  print(paste(refName,'vs',instName))
  
  # comparison safe invesement: 10-year treasury
  refInstrument <- read.table(refName,
                              header=TRUE,
                              sep=',',
                              stringsAsFactors=FALSE)
  # confirm assumption length(unique(refInstrument$Date))==nrow(refInstrument)
  if(length(unique(refInstrument$Date))!=nrow(refInstrument)) {
    stop("duplicate day")
  }
  refInstrument$RefAnnualReturn <- refInstrument$Close/100
  refInstrument <- refInstrument[,c('Date','RefAnnualReturn'),
                                 drop=FALSE]
  
  for(nm in names(dayMap)) {
    if(nm!='AnnualReturn') {
      refInstrument[[paste('Ref',nm,sep='')]] <- 
        (1+refInstrument$RefAnnualReturn)^(dayMap[[nm]]/daysInYear) - 1
    }
  }
  
  
  
  # Investment we are analyzing 
  Investment <- read.table(instName,
                           header=TRUE,
                           sep=',',
                           stringsAsFactors=FALSE)
  # confirm assumption length(unique(Investment$Date))==nrow(Investment)
  if(length(unique(Investment$Date))!=nrow(Investment)) {
    stop("duplicate day")
  }
  Investment <- Investment[,c('Date','Close')]
  
  # restrict down to days we have both infos and merge refInstrument into Investment
  commonDays <- intersect(refInstrument$Date,Investment$Date)
  refInstrument <- refInstrument[refInstrument$Date %in% commonDays,,drop=FALSE]
  Investment <- Investment[Investment$Date %in% commonDays,,drop=FALSE]
  Investment <- merge(Investment,refInstrument,by='Date')
  
  # number the days
  day <- as.POSIXct(Investment$Date,tz="UTC",format="%Y-%m-%d")
  baseDay <- min(day)
  Investment$dayCount <- as.numeric(difftime(day,baseDay,units='days'))
  # confirm assumption length(unique(refInstrument$dayCount))==nrow(refInstrument)
  if(length(unique(Investment$dayCount))!=nrow(Investment)) {
    stop("duplicate day")
  }
  Investment$day <- day
  # get Investment into dates moving forward order
  Investment <- Investment[order(Investment$dayCount),,drop=FALSE]
  
  
  for(icol in names(dayMap)) {
    print('*****************************************')
    window <- min(500,max(10,round(3*dayMap[[icol]])))
    stepName <- paste(instName,'vs',refName,icol)
    print(stepName)
    Investment <- compReturn(Investment,dayMap[[icol]],icol)
    col <- paste('D',icol,sep='')
    # get the Difference in returns or excess return
    Investment[[col]] <- Investment[[icol]] - Investment[[paste('Ref',icol,sep='')]]
    print(ggplot(Investment[!is.na(Investment[[col]]),,drop=FALSE],
                 aes_string(x='day',y=col)) +
            geom_point() + geom_smooth() +
            ggtitle(stepName))
    ncol <- paste('SharpeRatio',col,sep='_')
    Investment <- SharpeRatio(Investment,col,window)
    meanCol <- paste('Mean',col,sep='_')
    stdCol <- paste('StdDev',col,sep='_')
    msPlotFrame <- melt(Investment[!is.na(Investment[[stdCol]]),
                                   c('day',meanCol,stdCol),drop=FALSE],
                        id='day',
                        value.name='return')
    print(ggplot(msPlotFrame,
                 aes_string(x='day',y='return',
                            color='variable',linetype='variable')) + 
            geom_line() +
            scale_color_manual(values=c("green", "red")) +
            ggtitle(paste(stepName,'\nreturn mean/stdev window=',window,'days')) +
            theme(legend.position="bottom"))
    print(summary(Investment[[ncol]]))
    print(ggplot(Investment[!is.na(Investment[[ncol]]),,drop=FALSE],
                 aes_string(x='day',y=ncol)) + 
            geom_point() + 
            ggtitle(paste(stepName,'\nSharpe ratio window=',window,'days')))
    print('*****************************************')
  }
  print('********************************************************')
}
## [1] "********************************************************"
## [1] "TNX.csv vs SP.csv"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv DailyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -1.70000 -0.18700  0.05933  0.05265  0.29580  1.89400        9

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv TenDayReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -2.77300 -0.29330  0.07500  0.08287  0.44890  2.65600       29

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv MonthlyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -1.59400 -0.28750  0.09392  0.12310  0.48180  2.71300       90

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv AnnualReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -3.7610 -0.3359  0.1680  0.3037  0.9634  3.4340     499

## [1] "*****************************************"
## [1] "********************************************************"
## [1] "********************************************************"
## [1] "TNX.csv vs TENZ.csv"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv DailyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -88.34000  -0.21080   0.00586  -0.09961   0.21600   0.96070         9

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv TenDayReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -2.17800 -0.34140  0.01509  0.01901  0.35850  2.08800       29

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv MonthlyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -1.54000 -0.28400  0.01201  0.05131  0.37610  1.97000       90

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv AnnualReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -1.0400 -0.7466 -0.1724 -0.1326  0.6514  0.7719     499

## [1] "*****************************************"
## [1] "********************************************************"